/*==================================================
project:       Targeting - BootStrap FUN
Author:        David L. Vargas 
E-email:       davidvar@iadb.org
url:           
Dependencies:  
----------------------------------------------------
Creation Date:    
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/


 /*==================================================
               0: Program set up
 ==================================================*/
 
// Point estimates FUN 

cap program drop bstrap_pmt_mh2 
program define bstrap_pmt_mh2, rclass
    syntax [anything(name=subcmd id="subcommand")],  ///
    [                                                ///
                incvar(string)                       ///
                variables(string)                    ///
                allwaysvars(string)                  ///
				predvars(string)                     ///
				pmtvars(string)                      /// 
                Reps(string)                         ///
                FIXTrans(string)                     ///
				mhazard(string)      			  	 ///
				gainvars(string) 					 ///
				lossvars(string)   					 ////
				base(string)  						 ///
                q_l(string)                          ///
                q_u(string)                          ///
                clear                                ///
                pause                               ///
    ] 


    //========================== Preparation ========================== //
    cap frame drop estts 
    cap frame drop results 
    scalar epovline = 137350
    scalar l_epovline = log(epovline)

    tempvar pred_change

    *----------- 1.3 Define budget
    scalar prog_budget_colombia = 1997181822314 / 12  // Transfer expenses from (annual)
    scalar nfamilies_colombia = 50372424 / 2.9 // inhabitans by average family size
    scalar	ph_budget = prog_budget_colombia / nfamilies_colombia

    //========================== ESTIMATION ========================== //

    _dots 0, title(Loop running) reps(`reps') // use dot's intead of a message to go faster

    // determine sample in the margin (to randomise)
    tempvar rel_sample0
    tempvar rel_sample
    gen `rel_sample0'=(l_pp_inc_srv >= `q_l'*l_epovline & l_pp_inc_srv < `q_u'*l_epovline) if year==2020
    egen `rel_sample'=max(`rel_sample0'),by(id_hogar_SISBEN)

    qui {
    frame create estts 
    forv x = 1/`reps' {
        preserve 

        /*==================================================
            1: Model and income estiamtion
        ==================================================*/

        *---------- 1.1. Split sample
        *sort seed `=339487731+`x'' // ajustar esto (semilla para sort)

        sort id_vivienda_SISBEN
        set seed `=339487731+`x'-1'
        
        // An uniform have 50% probably of being > 0.5, let's use that for the split
        tempvar _sample_srv
        tempvar _random 
        tempvar _aux
        gen  `_random' = runiform() if year == 2019
        bys id_hogar_SISBEN : egen `_aux' = max(`_random')
        replace `_random' = `_aux'
        gen `_sample_srv' = 1
        replace `_sample_srv' = 2 if `_random' > 0.5
        lab var `_sample_srv' "Training or testing sample"
        lab define lsmaple 1 "Training" 2 "Testing", replace
        lab values `_sample_srv' lsmaple
        drop `_random' `_aux'
        
        *---------- 1.2. model estimation 
		// static PMT: 
		
		reg `pmtvars' [aw=pondera] if `_sample_srv'==1 & year==2019

		
		foreach var in `predvars' {
			tempvar aux
			gen  `_aux'=`var' if year==2019
			tempvar maux
			egen `maux'=mean(`_aux'), by(id_hogar_SISBEN)
			replace `var'=`maux'
			drop `maux' `_aux'
		}
		
		tempvar l_pred_inc_srv_PMT
        predict `l_pred_inc_srv_PMT' if `_sample_srv' == 2  , xb
		
        tempvar l_pred_change

        // model estimation 
        if ("`subcmd'" == "lasso") {
            // lassso estimation
            if (`x' == 1 | `x' == `reps')   {
                noi di in blue "Using Lasso estimates"
                if ("`allwaysvars'" != "") noi di in red "Allways vars: `allwaysvars'"
            }
            lasso linear `incvar'  (`allwaysvars') `variables' [iw=pondera] if year == 2020 & `_sample_srv' == 1 & !missing(l_pp_inc_srv), nocons    
		
		    // moral hazard correction

            * define no gain and no loss status
            foreach dty in gain loss {
                loc `dty'zero ""
                loc `dty'nomissing  ""
                loc i = 0
                foreach v of local `dty'vars {
                    loc ++i
                    if `i' == 1{
                        loc `dty'zero       `"`v' == 0"'
                        loc `dty'nomissing  `"!missing(`v')"'
                    }
                    else {
                        loc `dty'zero      `" ``dty'zero' & `v' == 0"'
                        loc `dty'nomissing `" ``dty'zero' & !missing(`v')"'

                    }   
                }
            }

            * Select liers at random 
            g aux1=runiform() if `gainnomissing' & `lossnomissing' & year > 2019
            count if aux1 < `mhazard' & year == 2020   // number of people = mh %
            local ns = r(N)       // save in a macro
            sort aux1             // organise by random number       
            bys `rel_sample' (aux1) year: gen aux2 = _n <= `ns' // flag a N people within each quintile at random
            
            * Not reported gain:
            foreach v in `gainvars' {
                replace `v'=0 if `v'==1 & aux2 == 1 & `rel_sample'==1
            }
            * fake loss
            foreach v in `lossvars' { 
                replace `v'=1 if `losszero' & aux2 == 1 & `rel_sample'==1
                * set gain to zero if possitive (double liers)
                foreach vg in `gainvars' {
                    replace `vg'=0 if `vg'==1 & aux2 == 1 & `rel_sample'==1
                    
                }
            }

            drop aux1 aux2

            // prediction
	
		    if `e(k_nonzero_sel)' == 0 {
                * this is a saveguard in case no coefficient is selected
                di as error "No coefficient selected by lasso algorithm - static assumed"
                gen `l_pred_change' = 0 if `_sample_srv' == 2  
            } 
            else { 	
                predict `l_pred_change' if `_sample_srv' == 2
            }
        }
        else {
            // OLS estimation
            reg `incvar' `variables' [aw=pondera] if year == 2020 & `_sample_srv' == 1 & !missing(l_pp_inc_srv), nocons robust
			
            
            * define no gain and no loss status
            foreach dty in gain loss {
                loc `dty'zero ""
                loc `dty'nomissing  ""
                loc i = 0
                foreach v of local `dty'vars {
                    loc ++i
                    if `i' == 1{
                        loc `dty'zero       `"`v' == 0"'
                        loc `dty'nomissing  `"!missing(`v')"'
                    }
                    else {
                        loc `dty'zero      `" ``dty'zero' & `v' == 0"'
                        loc `dty'nomissing `" ``dty'zero' & !missing(`v')"'

                    }   
                }
            }

            * Select liers at random 
            g aux1=runiform() if `gainnomissing' & `lossnomissing' & year > 2019
            count if aux1 < `mhazard' & year == 2020   // number of people = mh %
            local ns = r(N)       // save in a macro
            sort aux1             // organise by random number       
            bys `rel_sample' (aux1) year: gen aux2 = _n <= `ns' // flag a N people within each quintile at random
            
            * Not reported gain:
            foreach v in `gainvars' {
                replace `v'=0 if `v'==1 & aux2 == 1 & `rel_sample'==1
            }
            * fake loss
            foreach v in `lossvars' { 
                replace `v'=1 if `losszero' & aux2 == 1 & `rel_sample'==1
                * set gain to zero if possitive (double liers)
                foreach vg in `gainvars' {
                    replace `vg'=0 if `vg'==1 & aux2 == 1 & `rel_sample'==1
                    
                }
            }

            drop aux1 aux2
				
			predict `l_pred_change' if `_sample_srv' == 2
        }
        
        *--------- 1.3. Poverty prediction 

        tempvar pred_income_srv_shock
        tempvar l_pred_inc_srv_shock

        *predict `l_pred_change' if `_sample_srv' == 2
        *noi sum `l_pred_change'
        gen `pred_income_srv_shock' = exp(`l_pred_inc_srv_PMT') + `l_pred_change'
        gen `l_pred_inc_srv_shock'= log(`pred_income_srv_shock')
        replace `l_pred_inc_srv_shock' = `l_pred_inc_srv_PMT' if year == 2019
        replace `l_pred_inc_srv_shock' = . if `_sample_srv' == 1
        *noi sum `l_pred_inc_srv_shock'

        /*==================================================
            2: Inclusion and exclusion change
        ==================================================*/

        *--------- 2.1. Poverty prediction 
        
        tempvar targeted
        tempvar covered0

        // Targets
        gen `targeted' = l_pp_inc_srv < l_epovline 
        replace `targeted' = . if l_pp_inc_srv == .

        // Coverage 
        gen `covered0' = `l_pred_inc_srv_shock' < l_epovline // POVERTY LINE 
        replace `covered0'  = . if missing(`l_pred_inc_srv_shock')

        *--------- 2.1. inclusion and exclusion error
        tempvar inclusion_err0 inclusion_ok0 exclusion_err0 exclusion_ok0
        gen `inclusion_err0' = (`covered0' == 1) if !missing(`l_pred_inc_srv_shock') & `targeted' == 0 
        gen `inclusion_ok0' =  (`covered0' == 1) if !missing(`l_pred_inc_srv_shock') & `targeted' == 1
        gen `exclusion_err0' = (`covered0' == 0) if !missing(`l_pred_inc_srv_shock') & `targeted' == 1
        gen `exclusion_ok0' =  (`covered0' == 0) if !missing(`l_pred_inc_srv_shock') & `targeted' == 0
        
        loc k =0
        forv y = 2019/2021 {
            loc ++k
            mat estt = J(1,13,.)
            mat estt[1,1] = `y'

            loc j = 0
            foreach v in `covered0' `inclusion_err0' `inclusion_ok0' `exclusion_err0' `exclusion_ok0' {
                loc ++j
                sum `v' [aw = pondera] if year == `y' & !missing(`l_pred_inc_srv_shock') & `_sample_srv' == 2
                scalar `v'_`y' = r(mean)
                mat estt[1,`=`j'+1'] = `v'_`y' 
            }

            // ------- CRRA 

            keep if `_sample_srv' == 2

            * Define base
            sum `covered0' [aw=pondera] if !missing(`l_pred_inc_srv_shock') & year == `y' & `_sample_srv' == 2
            loc p_cov = r(mean)

            count if !missing(`l_pred_inc_srv_shock') & year == `y' & `_sample_srv' == 2
            scalar base = r(N) * `p_cov' 
            *noi di as result base 
            
            *----------- 2.3 transfer size and budget 
            
            count if `l_pred_inc_srv_shock' != . & year == `y' & `_sample_srv' == 2
            scalar ssize = r(N)

            if ("`fixtrans'" != "")  {
                tempvar hh_benefit
                gen `hh_benefit' = `fixtrans'
                replace `hh_benefit' = `hh_benefit' * `covered0' 

                * store value
                scalar budget = `fixtrans' * base                  // Budget within usable sample 
                scalar budget_nat = (nfamilies_colombia * `p_cov') * `fixtrans' // National budget equivalency
                sca hh_benefits = `fixtrans'

            }
            else {
                * update budget 
                scalar budget = ph_budget * ssize                  // Budget within usable sample 
                scalar budget_nat = nfamilies_colombia * ph_budget // National budget equivalency

                tempvar hh_benefit
                gen `hh_benefit' = budget / base	if year == `y' & `_sample_srv' == 2
                replace `hh_benefit' = 0 if base == 0 & year == `y' & `_sample_srv' == 2
                replace `hh_benefit' = `hh_benefit' * `covered0' if year == `y'
                
                * store value
                sca hh_benefits = budget / base	
            }
            
            tempvar pp_benefits_c
            gen `pp_benefits_c' = `hh_benefit' / n_members_srv if year == `y' & `_sample_srv' == 2
            replace `pp_benefits_c' = `pp_benefits_c' * `covered0' if year == `y' & `_sample_srv' == 2

            

            * total HH income
            tempvar income_hh
            gen `income_hh' = `base'+income_pp_nt_win + `pp_benefits_c' if !missing(`l_pred_inc_srv_shock') & year == `y' & `_sample_srv' == 2

            *----------- 2.3 Estiamtion CRRA by years
			
			scalar rho=1.5
            tempvar crra_l 
            gen `crra_l' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') & `_sample_srv' == 2
	        sum `crra_l' [aw = pondera] if year == `y'
            sca totalU_`y'_l = r(sum)
			
			scalar rho=3
            tempvar crra 
            gen `crra' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') & `_sample_srv' == 2
	        sum `crra' [aw = pondera] if year == `y'
            sca totalU_`y' = r(sum)
			
			scalar rho=4.5
            tempvar crra_u 
            gen `crra_u' = (((`income_hh')^(1-rho))*2)/(1-rho) if !missing(`l_pred_inc_srv_shock') & `_sample_srv' == 2
	        sum `crra_u' [aw = pondera] if year == `y'
            sca totalU_`y'_u = r(sum)
            *noi di r(sum)
            
            // same but with my crra FUN
			
           * welfareho if `_sample_srv' == 2, incvar(income_pp_nt_win) benvar(`pp_benefits_c') w(pondera) p(rho) base(`base')
            *scalar ud_`y' = r(CRRA)

            // Share of hhs with 0 income after transfers. 
			
			tempvar zero_post
			gen `zero_post'=(`income_hh'<=`base') if !missing(`l_pred_inc_srv_shock')  & `_sample_srv' == 2
			sum `zero_post' [aw = pondera] if  year == `y' & `_sample_srv' == 2
			scalar zero_`y'=r(mean)
			// Share of hhs under extreme poverty. 
			
			tempvar epov_post
			gen `epov_post'=(`income_hh'-`base'<epovline) if !missing(`l_pred_inc_srv_shock')  & `_sample_srv' == 2
			sum `epov_post' [aw = pondera] if  year == `y' & `_sample_srv' == 2
			scalar epov_`y'=r(mean)
			
			
           
            * Store in matrix
            loc ++j
            mat estt[1,`=`j'+1'] = totalU_`y'
			mat estt[1,`=`j'+2'] = totalU_`y'_l
			mat estt[1,`=`j'+3'] = totalU_`y'_u
			mat estt[1,`=`j'+4'] = hh_benefits
            mat estt[1,`=`j'+5'] = budget_nat
			mat estt[1,`=`j'+6'] = zero_`y'
			mat estt[1,`=`j'+7'] = epov_`y'

            if (`y' == 2019)		mat dat = estt
            else 				    mat dat = dat \ estt

            mat colnames dat = year covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u  hh_benefits budget_nat zero epov 

        }

        *----------  1.2. store in a dataset
        frame change estts  
        drop _all
        svmat dat, names(col)
        gen n_rep = `x'
        if `x' > 1 {
            append using `estt'
        } 
        
        tempfile estt
        save `estt', replace 
        
        frame change default
        restore 

        loc t_reps = `x'
        *noi di as result "Rep `x' of `reps' done!"

        noi _dots `x' 0

    } // end of reps loop

    frame create results 
    frame change results 

    use `estt', clear 

    mkmat _all , matrix(preds)

    foreach v in covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov {
        forv y = 2019/2021 {
            sum `v' if year == `y', det
            scalar lbci_`v'_`y' = r(p5)
            scalar ubci_`v'_`y' = r(p95) 
        }
        
    }

    collapse (mean)  covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov , by(year)
    

    foreach v in covered inclusion_err inclusion_ok exclusion_err exclusion_ok crra crra_l crra_u hh_benefits budget_nat zero epov {
        
        gen `v'_uci = .
        gen `v'_lci = .
        
        forv y = 2019/2021 {
            replace `v'_uci = ubci_`v'_`y' if year == `y'
            replace `v'_lci = lbci_`v'_`y' if year == `y'
        }
        
    }

    mkmat _all , matrix(estts)

    frame change default
    frame drop estts 
    
    // poverty prediction
    return matrix preds = preds
    return matrix estt = estts
    return scalar n_reps = `t_reps'

    } // end quietly 

    noi di as result "Results stored in matrix r(estt)"
    
end 

